package wordsegementation;

import org.apache.commons.lang3.StringUtils;

import java.io.*;
import java.util.StringTokenizer;

public class TernarySearchTrie {

    public final class TSTNode {
        public String data;

        protected TSTNode left;
        protected TSTNode mid;
        protected TSTNode right;

        public char spliter;

        public TSTNode(char key) {
            this.spliter = key;
        }

        public String toString() {
            return "data:" + data + " spliter:" + spliter;
        }
    }

    public TSTNode rootNode;

    public TernarySearchTrie(String fileName) throws UnsupportedEncodingException {
        try {
            FileInputStream file = new FileInputStream(fileName);
            BufferedReader in = new BufferedReader(new InputStreamReader(file, "GBK"));
            String line = null;
            try {
                while ((line = in.readLine()) != null) {
                    StringTokenizer st = new StringTokenizer(line, "\t");
                    String key = st.nextToken();

                    TSTNode currentNode = addWord(key);
                    currentNode.data = key;
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
    }

    protected TSTNode addWord(String key) {
        if (StringUtils.isBlank(key)) {
            return null;
        }
        int charIndex = 0;
        if (rootNode == null) {
            rootNode = new TSTNode(key.charAt(charIndex));
        }
        TSTNode currentNode = rootNode;

        while (true) {
            int charComp = key.charAt(charIndex) - currentNode.spliter;

            if (charComp == 0) {
                charIndex++;
                if (charIndex == key.length()) {
                    return currentNode;
                }
                if (currentNode.mid == null) {
                    currentNode.mid = new TSTNode(key.charAt(charIndex));
                }
                currentNode = currentNode.mid;
            } else if (charComp < 0) {
                //小于
                if (currentNode.left == null) {
                    currentNode.left = new TSTNode(key.charAt(charIndex));
                }
                currentNode = currentNode.left;
            } else {
                if (currentNode.right == null) {
                    currentNode.right = new TSTNode(key.charAt(charIndex));
                }
                currentNode = currentNode.right;
            }
        }
    }

    public String matchLong(String key, int offset) {
        if (offset >= key.length()) {
            return null;
        }
        String ret = null;
        if (StringUtils.isBlank(key) || rootNode == null) {
            return ret;
        }

        TSTNode currentNode = rootNode;
        int charIndex = offset;

        while (true) {
            if (currentNode == null) {
                return ret;
            }
            int charComp = key.charAt(charIndex) - currentNode.spliter;

            if (charComp == 0) {
                charIndex++;

                if (currentNode.data != null) {
                    //匹配最长候选词
                    ret = currentNode.data;
                }
                if (charIndex == key.length()) {
                    //已经匹配完成
                    return ret;
                }
                currentNode = currentNode.mid;
            } else if (charComp < 0) {
                currentNode = currentNode.left;
            } else {
                currentNode = currentNode.right;
            }
        }
    }

    public static void main(String[] args) throws Exception {
        TernarySearchTrie dic = new TernarySearchTrie("SDIC.txt");

        String sentence = "参加了许多双边和多边经济和金融论坛";
        int offset = 0;
        System.out.println("offset:" + offset);
        String ret = dic.matchLong(sentence, offset);
        System.out.println(sentence + " match:" + ret);

        offset = offset + ret.length();
        System.out.println("offset:" + offset);
        ret = dic.matchLong(sentence, offset);
        System.out.println(sentence + " match:" + ret);

        offset = offset + ret.length();
        System.out.println("offset:" + offset);
        ret = dic.matchLong(sentence, offset);
        System.out.println(sentence + " match:" + ret);

        offset = offset + ret.length();
        System.out.println("offset:" + offset);
        ret = dic.matchLong(sentence, offset);
        System.out.println(sentence + " match:" + ret);

        offset = offset + ret.length();
        System.out.println("offset:" + offset);
        ret = dic.matchLong(sentence, offset);
        System.out.println(sentence + " match:" + ret);

        offset = offset + ret.length();
        System.out.println("offset:" + offset);
        ret = dic.matchLong(sentence, offset);
        System.out.println(sentence + " match:" + ret);

        offset = offset + ret.length();
        System.out.println("offset:" + offset);
        ret = dic.matchLong(sentence, offset);
        System.out.println(sentence + " match:" + ret);

        offset = offset + ret.length();
        System.out.println("offset:" + offset);
        ret = dic.matchLong(sentence, offset);
        System.out.println(sentence + " match:" + ret);

        offset = offset + ret.length();
        System.out.println("offset:" + offset);
        ret = dic.matchLong(sentence, offset);
        System.out.println(sentence + " match:" + ret);

        offset = offset + ret.length();
        System.out.println("offset:" + offset);
        ret = dic.matchLong(sentence, offset);
        System.out.println(sentence + " match:" + ret);
    }

}
